clear all 
set more off

use /Users/yasenov/Dropbox/data_dta/master.dta, clear

tostring id grade year, replace
gen course = grade + " " + subject + " " + cohort
gen class = year + " " + course
gen studentxclass = id + " " + class
gen gradebook = year + grade + cohort
destring id grade year, replace

/* PRELIMINARY CODE */
****** Variables to denote the number of marks for each month and semester
forvalues i = 1(1)10 {
	gen num_month_`i' = 0
	replace num_month_`i' = num_month if month == `i'
	bysort id class: egen neznam_`i' = sum(num_month_`i')
}
	
gen num_year = neznam_1+neznam_2+neznam_3+neznam_4+neznam_5+neznam_6+neznam_7+neznam_8+neznam_9+neznam_10
drop neznam* num_month_*

*************
*************
*************

*** 1 by assignment - mean marks
sum marks

*** 2 by student - mean male, bulg
preserve
collapse male bulg, by(id)
sum male bulg
restore

*** 3 by class - mean num students, stem - NOTE I CHANGED GEOGRAPHY TO NOT BEING STEM
preserve
gen ones = 1
sort class
collapse ones, by(id class)
sort class
by class: egen sum = sum(ones)
collapse sum, by(class)
sum sum
restore

preserve
collapse stem, by(class)
sum stem
restore

*** 4 by student-class - mean # assignments
* redo this maybe. the number is way off
preserve
collapse num_year, by(id class)
sum num_year
restore

collapse (sum) f, by(id class)
